/* Figure 2: event study */
libname edu '!userprofile\\Dropbox\Education\Replication\Data';

/* identify treatment firms, hot = 1 */
data hot;
  set edu.hotmajor;
  if hot = 1;
run;

/* keep only the first year when the major becomes hot */
data hot;
  set hot;
  if major = lag(major) and year = lag(year)+1 then delete;
  if year = 1991 then delete; * computer sciences was hot from 1988 - 1989, 1990 was below median;
run;

/* manually change some start years (defined as start of the trend) */
data hot;
  set hot;
  if year = 1971 then year = 1967;
  if year = 1988 then year = 1986;
  if year = 1996 then year = 1993;
  if year = 2018 then year = 2016;
run;

/* add back electrical engineering (mapped to same industries as computer sciences) */
data hot2;
  set hot;
  if major = 7;
run;

data hot2;
  set hot2;
  major = 10;
  majortitle = "Electrical engineering";
run;

proc append base = hot data = hot2; run;

/* get a window t-5 to t+10 */
proc sql;
  create table hotall as
  select a.year as baseyear, a.major, a.majortitle, b.year_ending
  from hot as a, edu.enrolment as b
  where a.major = b.majorcode and -5 <= b.year_ending - a.year <= 10;
quit;

/* get control firms */
/* kick out majors that are top 5 around the event year */
data hotmajorall;
  set edu.hotmajorall;
  if num <= 5;
run;

proc sql;
  create table ineligible as
  select a.*, b.major as topmajor
  from hotall as a, hotmajorall as b
  where -2 <= b.year - a.baseyear <= 2;
quit;

proc sql;
  create table ineligible as
  select distinct baseyear, major, majortitle, topmajor
  from ineligible;
quit;

proc sql;
  create table control as
  select a.year as baseyear, a.major, a.majortitle, b.majorcode as control
  from hot as a, edu.enrolment as b
  where a.year = b.year_ending;
quit;

data control;
  set control;
  if control = 1 or control = 4 or control = 6 or control = 7 or control = 8 or control = 9 or control = 10 or control = 13 or control = 14 or control = 16;
run;

proc sql;
  create table control2 as
  select a.*, b.topmajor
  from control as a left join ineligible as b
  on a.baseyear = b.baseyear and a.major = b.major and a.control = b.topmajor;
quit;

data control (drop = topmajor);
  set control2;
  if topmajor ~= . then delete;
run;

/* get a window t-5 to t+10 */
proc sql;
  create table controlall as
  select a.*, b.year_ending
  from control as a, edu.enrolment as b
  where a.control = b.majorcode and -5 <= b.year_ending - a.baseyear <= 10;
quit;

/* get log bachelors after taking out major and year fixed effects */
data enrolment (keep = year_ending majorcode log_bachelor);
  set edu.enrolment;
  if majorcode = 1 or majorcode = 4 or majorcode = 6 or majorcode = 7 or majorcode = 8 or majorcode = 9 or majorcode = 10 or majorcode = 13 or majorcode = 14 or majorcode = 16;
run;

proc surveyreg data = enrolment;
  class 
  year_ending majorcode
  ;
  model log_bachelor = 
  year_ending majorcode
  /solution adjrsq;
  output out = r_enrolment r = r_enrolment;
run;

proc sql;
  create table hotall2 as
  select a.*, b.r_enrolment
  from hotall as a, r_enrolment as b
  where a.year_ending = b.year_ending and a.major = b.majorcode;
quit;

proc sql;
  create table controlall2 as
  select a.*, b.r_enrolment
  from controlall as a, r_enrolment as b
  where a.year_ending = b.year_ending and a.control = b.majorcode;
quit;

/* construct Figure 2 */
data hotall2;
  set hotall2;
  year = year_ending - baseyear;
run;

data controlall2;
  set controlall2;
  year = year_ending - baseyear;
run;

proc sort data = controlall2; by year; run;

proc means data = controlall2 noprint;
  var r_enrolment;
  by year;
  output out = controlall3
  mean = control;
run;

proc sql;
  create table net as
  select a.*, b.control
  from hotall2 as a, controlall3 as b
  where a.year = b.year;
quit;

data netall2;
  set net;
  r_enrolment = r_enrolment - control;
run;

proc sort data = netall2; by year; run;

proc means data = netall2 noprint;
  by year;
  var r_enrolment;
  output out = net mean = r_enrolment std = stddev_enrolment;
run;
